import scrapy
import redis
import json


class HospitalSpider(scrapy.Spider):
    name = "hospital"
    redis_conn = redis.Redis(decode_responses=True, db=3)

    def start_requests(self):
        yield scrapy.Request(url="https://www.haodf.com/hospital/list-11.html", dont_filter=False)

    def parse(self, response):
        a_list = response.xpath("//div[@id='el_tree_1000000']/div[contains(@class,'kstl')]/a")
        for a in a_list:
            # 省份名与链接
            province = a.xpath("./text()").get()
            url = "https:" + a.xpath("./@href").get()
            yield scrapy.Request(url=url, callback=self.parse_province, dont_filter=True, meta={
                "province": province
            })

    def parse_province(self, response):
        province = response.meta["province"]
        hospital_list = response.xpath("//div[@class='m_ctt_green']/ul/li/a")
        for hospital in hospital_list:
            # 医院名与链接
            hospital_name = hospital.xpath("./text()").get()
            hospital_url = hospital.xpath("./@href").get()
            hospital_url = hospital_url.replace(".html", "") + "/keshi/list.html"
            task = {
                "url": hospital_url,
                "meta": {
                    "province": province,
                    "hospital_name": hospital_name
                }
            }
            self.redis_conn.rpush("task_queue", json.dumps(task, ensure_ascii=False))
